### Analyses reported in main text #####
### Alternative reproductive tactics of 
### unflanged and flanged male orang-utans revisited

rm(list=ls())
library(plyr)
library(glmmTMB)
library(multcomp)
library(DHARMa)
library(MuMIn)
library(ggplot2)
library(ggeffects)
library(effects)
library(cowplot)
library(car)
library(magick)
library(performance)
library(survival)
library(survminer)
library(coxme)
library(tidyverse)

# import data set including all longitudinal and cross-sectional observational data on males (focal and party data)
dd <- read.csv("ARTs_dataset_MSmain_Apr23.csv",
               stringsAsFactors = TRUE)

# set levels of morph
dd$Morph <- factor(dd$Morph,
                   levels=c("ufm", "flm"))

# Subset with only full day focal follows
FL <- droplevels(subset(dd, FollowDay==1)) ## only when males were focal
NN <- droplevels(subset(FL, FollowType=="NN")) ## only NN-follows
NN1 <- droplevels(subset(NN, !is.na(FAI))) ## exclude days with unknown FAI
NN2 <-  droplevels(subset(NN1, !is.na(ObsTime))) ## exclude unknown Observation hours

# Association frequency - longitudinal data set ---------------------------------------------------

longiN <- droplevels(subset(NN2, transition == "yes")) # only longitudinal NN data
NNL1 <- droplevels(subset(longiN, !is.na(FemAssocH))) ## exclude where duration of association unknown
NNL2 <- droplevels(subset(NNL1, ID!="Vr")) ## Vr was not seen for 10 years and then identified genetically as the same male with flanges
NNL2$zFAI <- ave(NNL2$FAI, NNL2$Site, FUN=scale)

### Number of females in association per full-day focal follow
### all data based on full-day focal follows (i.e. 24h), no offset term

femNr0 <- glmmTMB(NrFemAssoc ~ 1  + (1|ID/Year/Month),
                  data=NNL2, 
                  family=poisson(link = "log"))

# full model
femNrALLaltTMB<- glmmTMB(NrFemAssoc ~ Site +zFAI+YearSinceFlanged+DaysFollowed+FirstDayOppoSex+ 
                           +(1|ID/Year/Month),
                         data=NNL2,
                         family=poisson(link = "log"))

# try if splines improve model fit, they do not
library(splines)
femNrALLaltTMBsp<- glmmTMB(NrFemAssoc ~ Site +zFAI+ns(YearSinceFlanged, 2)+DaysFollowed+FirstDayOppoSex+ 
                             +(1|ID/Year/Month),
                           data=NNL2,
                           family=poisson(link = "log"))

# testing interaction terms
femNrALLIATMB <- glmmTMB(NrFemAssoc ~ Site*YearSinceFlanged + zFAI+DaysFollowed+FirstDayOppoSex+ 
                           +(1|ID/Year/Month),
                         data=NNL2, 
                         family=poisson(link = "log"))

anova(femNr0, femNrALLaltTMB, femNrALLaltTMBsp, femNrALLIATMB) # spline until 4 knots do not improve model
anova(femNr0, femNrALLaltTMB, femNrALLIATMB)
summary(femNrALLaltTMB)
confint(femNrALLaltTMB) # calculate confidence intervals for estimates
r.squaredGLMM(femNrALLaltTMB)
AIC(femNrALLaltTMB) - AIC(femNr0)

# check residuals and model assumptions
check_collinearity(femNrALLaltTMB) # no collinearity issues
plot(resid(femNrALLaltTMB))
check_overdispersion(femNrALLaltTMB) # no significant evidence for overdispersion
res <-simulateResiduals(femNrALLaltTMB, plot = T) # some deviation for dispersion test and residuals vs. predicted values

# Figure 2A reported in MS
FemLNrPred <- ggpredict(femNrALLaltTMB, terms = c("YearSinceFlanged")) ## model predictions of full model 

FemNrLongi <- ggplot()+
  geom_ribbon(data= FemLNrPred,
              mapping = aes(x=x, ymin=conf.low, ymax=conf.high),
              alpha=0.1)+
  geom_line(data= FemLNrPred,
            mapping = aes(x, predicted), size=1.2)+
  geom_jitter(NNL2,
              mapping= aes(YearSinceFlanged, NrFemAssoc, col=Morph, pch=Site),
              width=0.15, height=0.1, alpha=0.8)+
  theme_set(theme_cowplot())+
  theme(legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12),
        legend.position = c(0.65,0.9))+
  geom_vline(xintercept=0, col="#757575", lty=2)+
  annotate("text", x= -5, y=-0.22, #fontface="italic", 
           label= "unflanged", col="#757575")+
  annotate("text", x= 5, y=-0.22, #fontface="italic", 
           label= "flanged", col="#757575")+
  labs(x="Years to/since flanged",
       y="Number of females in association
per full-day focal follow",
title = "Longitudinal data") +
  scale_colour_manual(values=c("#A496CF","#3c2375"))+
  guides(col="none")

# female association numbers reported in main text
aggregate(NNL2$NrFemAssoc,
          by=list(NNL2$Morph), 
          FUN=mean)
aggregate(NNL2$NrFemAssoc,
          by=list(NNL2$Morph), 
          FUN=sd)
table(NNL2$Morph)

# overview table of associations
aggregate(cbind(NNL2$NrFemAssoc,NNL2$FemAssocH),
          by=list(NNL2$Site,
                  NNL2$Morph), 
          FUN=mean,na.rm=TRUE)
aggregate(cbind(NNL2$NrFemAssoc,NNL2$FemAssocH),
          by=list(NNL2$Site,
                  NNL2$Morph), 
          FUN=sd,na.rm=TRUE)
table(NNL2$Site,
      NNL2$Morph)

# Association frequency - cross-sectional data set ------------------------------------------------

cross <-  droplevels(subset(NN2, transition=="no")) ## only males that were observed in either morph and not both
cross1 <- droplevels(subset(cross, !is.na(FemAssocH))) # exclude days where association hours are unknown
cross1$zFAI <- ave(cross1$FAI, cross1$Site, FUN=scale) # scale FAI within site
cross1$zAssoc <- ave(cross1$FemAssocH, FUN=scale) ## scale female hours for convergence

### Number of females in association per full-day focal follow
### all data based on full-day focal follows (i.e. 24h), no offset term
femNrCr0 <- glmmTMB(NrFemAssoc ~ 1  +
                      (1|ID/Year/Month),
                    data=cross1, 
                    family=poisson(link = "log"))

# full model
femNrCrALL <- glmmTMB(NrFemAssoc ~ Site +zFAI+Morph+YearSinceFirstSeenTab+FirstDayOppoSex+DaysFollowed+
                        +(1|ID/Year/Month),
                      data=cross1,
                      family=poisson(link = "log"))

# testing interaction terms --> Site*Morph and Morph*zFAI no improved model fit, but site*zFAI
femNrCrALLIA <- glmmTMB(NrFemAssoc ~ Site+zFAI+FirstDayOppoSex+DaysFollowed+YearSinceFirstSeenTab*Morph +
                          +(1|ID/Year/Month),
                        data=cross1, 
                        family=poisson(link = "log"))

## model comparisons
anova(femNrCr0, femNrCrALL, femNrCrALLIA)

# summary full model
summary(femNrCrALL)
drop1(femNrCrALL, test="Chisq")
confint(femNrCrALL)
r.squaredGLMM(femNrCrALL)
AIC(femNrCrALL) - AIC(femNrCr0)
anova(femNrCr0, femNrCrALL)

# check model assumptions
check_overdispersion(femNrCrALL) # no evidence of overdispersion
check_collinearity(femNrCrALL) # some moderate correlation of interaction terms
plot(resid(femNrCrALL))
res <-simulateResiduals(femNrCrALL, plot = T)  # some deviation in dispersion test

# descriptive statistics reported in manuscript by site and morph
aggregate(cbind(cross1$NrFemAssoc,
                cross1$FemAssocH),
          by=list(cross1$Site,
                  cross1$Morph), 
          FUN=mean, na.rm=TRUE)

aggregate(cbind(cross1$NrFemAssoc,
                cross1$FemAssocH),
          by=list(cross1$Site,
                  cross1$Morph), 
          FUN=sd, na.rm=TRUE)

# descriptive statistics reported in manuscript by morph
aggregate(cbind(cross1$NrFemAssoc,
                cross1$FemAssocH),
          by=list(cross1$Morph), 
          FUN=mean, na.rm=TRUE)

aggregate(cbind(cross1$NrFemAssoc,
                cross1$FemAssocH),
          by=list(cross1$Morph), 
          FUN=sd, na.rm=TRUE)

# calculate mean per ID, morph and site for figure
sumMScr <- cross1 %>% group_by(ID, Morph, Site) %>% dplyr::summarize(NrFemAssoc = mean(NrFemAssoc),
                                                                     N = n())

FemCNrPredm <- ggpredict(femNrCrALL, terms = c("Morph", "Site")) ## take predictions of best model 

# Figure 2B reported in MS
FemNrCross <- ggplot()+
  geom_point(data= sumMScr,
             aes(Morph, NrFemAssoc, col=Site, pch=Site, size=N),
             position=position_jitterdodge(dodge.width = 0.9),
             alpha=0.6)+
  geom_segment(data=FemCNrPredm,
               aes(x = 0.6, xend = 0.98,
                   y = predicted[1], yend = predicted[1]),
               color = "#0a0972", size = 1.4) +
  geom_segment(data=FemCNrPredm,
               aes(x = 0.79, xend = 0.79,
                   y = conf.low[1], yend = conf.high[1]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=FemCNrPredm,aes(x = 1.04, xend = 1.42, 
                                    y = predicted[2], yend = predicted[2]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=FemCNrPredm,
               aes(x = 1.23, xend = 1.23,
                   y = conf.low[2], yend = conf.high[2]),
               color = "#cf8232", size = 0.9) +
  geom_segment(data=FemCNrPredm,aes(x = 1.62, xend = 2.00, 
                                    y = predicted[3], yend = predicted[3]), 
               color = "#0a0972", size = 1.4) +
  geom_segment(data=FemCNrPredm,
               aes(x = 1.81, xend = 1.81,
                   y = conf.low[3], yend = conf.high[3]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=FemCNrPredm,aes(x = 2.04, xend = 2.42, 
                                    y = predicted[4], yend = predicted[4]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=FemCNrPredm,
               aes(x = 2.23, xend = 2.23,
                   y = conf.low[4], yend = conf.high[4]),
               color = "#cf8232", size = 0.9)  +
  scale_color_manual(values=c("#0053d9", "#ffc832"))+
  scale_x_discrete(labels=c("unflanged", "flanged"))+
  theme_cowplot()+
  theme(legend.position = c(0.65,0.8),
        legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12))+
  labs(x="Male morph",
       y="Mean number of females in association
per full-day focal follow",
title="Cross-sectional data")+
  guides(size="none")+
  scale_y_continuous(breaks=c(0, 1, 2))

FemNrFig2 <- plot_grid(FemNrLongi, FemNrCross,
                       labels = c('A.','B.'),
                       align="l",
                       rel_heights = c(1,1),
                       ncol = 2)

ggsave("Fig2_FemNr_Jan23_flanged.tiff",
       plot = FemNrFig2,
       device = "tiff",
       scale = 1.1,
       width = 160,
       height = 100,
       units = "mm",
       dpi = 300)

# Survival analysis on maintenance of multi-day associations with females --------

# only take days of "last" association 
# (the male was observed without a female associate the next day) 

lastDay <- droplevels(subset(dd, LastAssocDay =="yes"))
lastDay$zFAI <- ave(lastDay$FAI, by="Site", FUN=scale)
lastDay1 <- droplevels(subset(lastDay, !is.na(zFAI)))

aggregate(lastDay1$ConsecutiveDaysAssocFem,
          by=list(lastDay1$Site,
                  lastDay1$Morph),
          FUN=mean, na.rm=TRUE)

AssSurv <- Surv(time=lastDay1$ConsecutiveDaysAssocFem,
                event=lastDay1$NextdaySeen)
ass.coxme0 <- coxme(AssSurv ~  1+
                      (1|ID/Year/Month), 
                    data=lastDay1)
ass.coxme <- coxme(AssSurv ~  Site+Morph+zFAI+YearSinceFirstSeenTab+
                     (1|ID/Year/Month), 
                   data=lastDay1)

# model comparisons
anova(ass.coxme0, ass.coxme)
summary(ass.coxme)
confint(ass.coxme)

AIC(ass.coxme) - AIC(ass.coxme0)

# counts of random intercepts
plyr::count(lastDay1$ID)
plyr::count(lastDay1, vars=c("Year", "ID"))
plyr::count(lastDay1, vars=c("Year", "Month", "ID"))

### Figure 3
assMain <- survfit(AssSurv ~  Site+Morph, data=lastDay1)

ggsurvAssMain <- ggsurvplot(assMain, data = lastDay1,
                            size = 1,                 # change line size
                            palette = c("#8e7cc3","#351c75"),
                            conf.int = TRUE,          # Add confidence interval
                            legend.labs = c("unflanged", "flanged"), 
                            facet.by = "Site",
                            legend.title="Associations with females by male morph",
                            xlab = "Days since first observed in association",
                            ylab ="Probability of ending consecutive 
association days with females", 
                            panel.labs = list("Suaq", "Tuanan"),
                            risk.table.height = 0.4, # Useful to change when you have multiple groups
                            censor.shape="|", censor.size=3,
                            ggtheme = theme_classic(),
                            fun="event",
                            break.time.by=2)
Fig3 <- ggpar(ggsurvAssMain,
              font.x = c(12),
              font.y = c(12),
              font.caption = c(10), 
              panel.labs.font = c(12),
              font.legend = c(12),
              font.tickslab = c(11, "black"),
              font.family = "Arial")

ggsave("Fig3_SurvAnalysis_Apr23.tiff",
       plot = Fig3,
       device = "tiff",
       scale = 0.9,
       width = 160,
       height = 120,
       units = "mm",
       dpi = 300)


# Proximity to females - longitudinal data set ----------------------------------------

longiAssoc <- droplevels(subset(NNL2, NrFemAssoc>0)) # only days with females in association
longiProx <- droplevels(subset(longiAssoc, !is.na(BoutsWithin10))) # exclude data where proximity not available
longiProx1 <- droplevels(subset(longiProx, !is.na(FLMpresent))) # exclude data where no info if additional male present
longiProx1$FLMpr <- ifelse(longiProx1$FLMpresent>0, "additional flm present", "no additional flm")
longiProx1$FemBouts <- round(longiProx1$FemAssocH*30,0) # re-transform cumulative association hours to 2-min bouts
longiProx1$notWithin10 <- longiProx1$FemBouts - longiProx1$BoutsWithin10

## binomial model is overdispersed
## betabinomial model fitted instead
longiProx1$Prop10 <- longiProx1$BoutsWithin10/longiProx1$FemBouts

# null model
w10.0b <- glmmTMB(Prop10 ~ 1 +
                    + (1|ID/Year/Month),
                  weights = FemBouts,
                  data=longiProx1, 
                  family=betabinomial(link="logit"))

# full model
w10b <- glmmTMB(Prop10~ Site++zFAI+YearSinceFlanged+FLMpresent+UFMpresent+Fertility+ConsecutiveDaysAssocFem
                + (1|ID/Year/Month),
                weights = FemBouts,
                data=longiProx1, 
                family=betabinomial(link="logit"))

# full model with interactions
w10IAb <- glmmTMB(Prop10~ Site+zFAI+YearSinceFlanged*FLMpresent+UFMpresent+Fertility+ConsecutiveDaysAssocFem
                  + (1|ID/Year/Month),
                  weights = FemBouts,
                  data=longiProx1, 
                  family=betabinomial(link="logit"))

# model comparison
anova(w10.0b, w10b, w10IAb) 
AIC(w10b) - AIC(w10.0b)

# summary of best fiting model
summary(w10b)
confint(w10b, method="Wald")

# check model assumptions
plot(resid(w10b))
check_collinearity(w10b) # low correlation
check_overdispersion(w10b) # still overdispersed
res <-simulateResiduals(w10b, plot = T) # quantile deviation detected

ProxLongiPred <- ggpredict(w10b, terms = c("YearSinceFlanged", "FLMpresent"))


# Figure - suppl. mat.
longiProxFem <- ggplot()+
  geom_point(data=longiProx1,
             mapping=aes(YearSinceFlanged, Prop10, size=FemBouts, pch=Site, col=Morph),
             alpha=0.7)+
  geom_ribbon(data= ProxLongiPred,
              mapping = aes(x=x, ymin=conf.low, ymax=conf.high, fill=group),
              alpha=0.1)+
  geom_line(data=ProxLongiPred, 
            mapping=aes(x, predicted, lty=group), size=1.2)+
  theme_set(theme_cowplot())+
  guides(size=FALSE)+
  theme_set(theme_cowplot())+
  theme(legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12),
        legend.position = "top")+
  geom_vline(xintercept=0, col="#757575", lty=2)+
  annotate("text", x= -5, y=-0.05, #fontface="italic", 
           label= "unflanged", col="#757575")+
  annotate("text", x= 5, y=-0.05, #fontface="italic", 
           label= "flanged", col="#757575")+
  labs(x="Years to/since flanged",
       y="Proportion of association bouts 
within 10m of females",
title = "Longitudinal data") +
  scale_colour_manual(values=c("#A496CF","#3c2375"))+
  scale_fill_manual("FLM present", values=c("#999999", "#2d2d2d"),
                    labels=c("no", "yes"))+
  scale_linetype_manual("FLM present", values=c(1, 3),
                        labels=c("no", "yes"))+
  guides(col="none", pch="none")

# Proximity to females - cross-sectional data set -------------------------------------

crossAssoc <- droplevels(subset(cross1, NrFemAssoc>0)) # subset with only female associates
crossProx <- droplevels(subset(crossAssoc, !is.na(BoutsWithin10))) # exclude NA observations
crossProx$FemBouts <- round(crossProx$FemAssocH*30,0) # transform hours back to 2-min instantaneous bouts (which hours are based on)
crossProx$notWithin10 <- crossProx$FemBouts - crossProx$BoutsWithin10

### binomial model is overdispersed, try betabinomial instead
crossProx$Prop10 <- crossProx$BoutsWithin10/crossProx$FemBouts

# null model
w10.0b <- glmmTMB(cbind(BoutsWithin10,notWithin10) ~ 1+
                    + (1|ID/Year/Month),
                  data=crossProx, 
                  family=betabinomial(link="logit"))

# full model
w10b <- glmmTMB(cbind(BoutsWithin10,notWithin10)~ Site+zFAI+Morph+FLMpresent+UFMpresent+Fertility+ConsecutiveDaysAssocFem+YearSinceFirstSeenTab+
                  + (1|ID/Year/Month),
                data=crossProx, 
                family=betabinomial(link="logit"))

# full model with interaction term
w10IAb <- glmmTMB(cbind(BoutsWithin10,notWithin10)~ Site+zFAI+Morph*FLMpresent+UFMpresent+YearSinceFirstSeenTab+Fertility+ConsecutiveDaysAssocFem
                    + (1|ID/Year/Month),
                  data=crossProx, 
                  family=betabinomial(link="logit"))

# full model with interaction term
w10IAb2 <- glmmTMB(cbind(BoutsWithin10,notWithin10)~ Site+zFAI+Morph*FLMpresent+UFMpresent+Fertility+ConsecutiveDaysAssocFem+Morph*YearSinceFirstSeenTab
                  + (1|ID/Year/Month),
                  data=crossProx, 
                  family=betabinomial(link="logit"))

# full model with additional interaction term
# improves model fit, but with very high VIF
w10IA1b <- glmmTMB(cbind(BoutsWithin10,notWithin10)~ Site+zFAI+Morph*FLMpresent+UFMpresent+Morph*Fertility+ConsecutiveDaysAssocFem+YearSinceFirstSeenTab
                     + (1|ID/Year/Month),
                   data=crossProx, 
                   family=betabinomial(link="logit"))

anova(w10.0b, w10b,w10IAb, w10IAb2)
anova(w10.0b, w10b,w10IAb, w10IA1b)
summary(w10IAb) # interaction Morph*Fertility improves model fit, but high collinearity
confint(w10IAb)

anova(w10.0b,w10IAb)
AIC(w10.0b) - AIC(w10IAb)

## check model assumptions
plot(resid(w10IAb))
check_collinearity(w10IAb) # low correlation
check_overdispersion(w10IAb) # still overdispersed
res <-simulateResiduals(w10IAb, plot = T) # look okay

## model predictions
ProxCross <- ggpredict(w10IAb, terms=c("Morph", "Site", "FLMpresent"))

sumProxcr <- crossProx %>% group_by(ID, Morph, Site) %>% dplyr::summarize(Prop10mean = mean(Prop10),
                                                                          N = n())

# Figure reported in MS
lty <- c("no flm present" = 1, "flm present" = 3)
FemProxCross <- ggplot()+
  geom_point(data= sumProxcr,
             aes(Morph, Prop10mean, pch=Site, col=Site, size=N),
             position=position_jitterdodge(dodge.width = 0.9),
             alpha=0.6)+
  geom_segment(data=ProxCross,
               aes(x = 0.59, xend = 0.97,
                   y = predicted[1], yend = predicted[1]),
               color = "#0a0972", size = 1.4) +
  geom_segment(data=ProxCross,
               aes(x = 0.78, xend = 0.78,
                   y = conf.low[1], yend = conf.high[1]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=ProxCross,
               aes(x = 0.61, xend = 0.99,
                   y = predicted[2], yend = predicted[2]),
               color = "#8685b9", size = 1.4, lty=3) +
  geom_segment(data=ProxCross,
               aes(x = 0.8, xend = 0.8,
                   y = conf.low[2], yend = conf.high[2]),
               color = "#8685b9", size = 0.9, lty=3) +
  geom_segment(data=ProxCross,aes(x = 1.03, xend = 1.41, 
                                  y = predicted[3], yend = predicted[3]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=ProxCross,
               aes(x = 1.22, xend = 1.22,
                   y = conf.low[3], yend = conf.high[3]),
               color = "#cf8232", size = 0.9) +
  geom_segment(data=ProxCross,aes(x = 1.05, xend = 1.43, 
                                  y = predicted[4], yend = predicted[4]), 
               color = "#e2b484", size = 1.4, lty=3) +
  geom_segment(data=ProxCross,
               aes(x = 1.24, xend = 1.24,
                   y = conf.low[4], yend = conf.high[4]),
               color = "#e2b484", size = 0.9, lty=3) +
  geom_segment(data=ProxCross,aes(x = 1.63, xend = 2.01, 
                                  y = predicted[6], yend = predicted[6]), 
               color = "#8685b9", size = 1.4, lty=3) +
  geom_segment(data=ProxCross,
               aes(x = 1.82, xend = 1.82,
                   y = conf.low[6], yend = conf.high[6]),
               color = "#8685b9", size = 0.9, lty=3) +
  geom_segment(data=ProxCross,aes(x = 1.61, xend = 1.99, 
                                  y = predicted[5], yend = predicted[5]), 
               color = "#0a0972", size = 1.4) +
  geom_segment(data=ProxCross,
               aes(x = 1.8, xend = 1.8,
                   y = conf.low[5], yend = conf.high[5]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=ProxCross,aes(x = 2.05, xend = 2.43, 
                                  y = predicted[8], yend = predicted[8]), 
               color = "#e2b484", size = 1.4, lty=3) +
  geom_segment(data=ProxCross,
               aes(x = 2.24, xend = 2.24,
                   y = conf.low[8], yend = conf.high[8]),
               color = "#e2b484", size = 0.9, lty=3)  +
  geom_segment(data=ProxCross,aes(x = 2.03, xend = 2.41, 
                                  y = predicted[7], yend = predicted[7]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=ProxCross,
               aes(x = 2.22, xend = 2.22,
                   y = conf.low[7], yend = conf.high[7]),
               color = "#cf8232", size = 0.9)  +
  scale_color_manual(values=c("#0053d9", "#ffc832"))+
  scale_x_discrete(labels=c("unflanged", "flanged"))+
  theme_cowplot()+
  theme(legend.position = "top",
        legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12))+
  labs(x="Male morph",
       y="Proportion of association bouts 
within 10m of females",
title="Cross-sectional data")+
  guides(size="none")

Fig4 <- plot_grid(longiProxFem, FemProxCross,
                  labels = c('A.','B.'),
                  align="l",
                  rel_widths = c(1.2,1),
                  ncol = 2)

ggsave("Fig4_Prox_Jan23_betabin_flanged.tiff",
       plot = Fig4,
       scale=1.1,
       width = 200,
       height = 100,
       units = "mm",
       dpi = 300)

# Copulation frequency - longitudinal data set ----------------------------------------------------

## based on male full-day focal follow data --> individual male copulation rates
NNL2$zAssoc <- scale(NNL2$FemAssocH)

## testing twice copulation number
## Bonferroni adjusted P value: 0.025

## very few copulations at the Suaq population with 
## Poisson and binomal GLMM model result in singular fits, even when reduced to minimum
## Negative binomial model works
CopRateNB0 <- glmmTMB(CopNr ~ 1+ (1|ID),
                      #offset=log(ObsTime),
                      data=NNL2,
                      family="nbinom1")

CopRateNB <- glmmTMB(CopNr ~ Site + zFAI+ YearSinceFlanged + zAssoc+ (1|ID),
                     #offset=log(ObsTime),
                     data=NNL2,
                     family="nbinom1")

CopRateNBIA <- glmmTMB(CopNr ~ Site  + zFAI+YearSinceFlanged*zAssoc+ (1|ID),
                       #offset=log(ObsTime),
                       data=NNL2,
                       family="nbinom1")

anova(CopRateNB0, CopRateNB, CopRateNBIA) # with Bonferroni adjusted p-values interaction does not improved full model
anova(CopRateNB0, CopRateNBIA) 
summary(CopRateNB)
confint(CopRateNB)
r.squaredGLMM(CopRateNB)
AIC(CopRateNB)- AIC(CopRateNB0)


## Check model assumptions
check_overdispersion(CopRateNB) # no evidence for overdispersion
check_collinearity(CopRateNB) # low correlation
plot(resid(CopRateNB))
res <-simulateResiduals(CopRateNB, plot = T) # look okay

### Only days when in association with females
## Add association days only (when not focal)
## Copulation rate per time in association with females

longiALL <- droplevels(subset(dd, transition=="yes")) ## exclude cross-sectional data
AssocLongiF <- droplevels(subset(longiALL, FemAssocH>0)) # only days when in association with females
AssocLongiF1 <- droplevels(subset(AssocLongiF, !is.na(FAI))) # exclude days where FAI unknown
AssocLongiF1$zFAI <- ave(AssocLongiF1$FAI, AssocLongiF1$Site, FUN=scale) 
AssocLongiF1$zFemH <- scale(AssocLongiF1$FemAssocH)
AssocLongi <- droplevels(subset(AssocLongiF1, !is.na(YearSinceFlanged)))

CopAssoc0 <- glmmTMB(CopNr ~ 1+
                       + (1|ID/Year),
                     offset=log(FemAssocH),
                     data=AssocLongi,
                     family=poisson(link="log"))
CopAssoc <- glmmTMB(CopNr ~ Site + YearSinceFlanged+ Fertility+zFAI+ 
                      + (1|ID/Year),
                    offset=log(FemAssocH),
                    data=AssocLongi,
                    family=poisson(link="log"))

summary(CopAssoc)
anova(CopAssoc0, CopAssoc)
confint(CopAssoc)
r.squaredGLMM(CopAssoc)
AIC(CopAssoc) - AIC(CopAssoc0)

# check model assumptions
plot(resid(CopAssoc))
check_collinearity(CopAssoc) # low correlation
check_overdispersion(CopAssoc) # no evidence for overdispersion
res <-simulateResiduals(CopAssoc, plot = T) # some quantile deviations detected

# Figure 3A
AssocLongi$CopRateAssoc <- AssocLongi$CopNr/AssocLongi$FemAssocH

CopAssocLPred <- ggpredict(CopAssoc, terms = c("YearSinceFlanged"))

# summarize data
sumLongiCopAssoc <- AssocLongi %>% 
  group_by(Site, YearSinceFlanged, ID) %>% 
  dplyr::summarize(mean_CopRateAssoc = mean(CopRateAssoc),
                   N = n())

LongiCopA <- ggplot()+
  geom_jitter(data=AssocLongi,
              mapping= aes(YearSinceFlanged, CopRateAssoc, col=Morph, pch=Site), width=0.15, #height=0.1,
              alpha=0.8)+
  geom_ribbon(data= CopAssocLPred,
              mapping = aes(x=x, ymin=conf.low, ymax=conf.high),
              alpha=0.1)+
  geom_line(data= CopAssocLPred,
            mapping = aes(x, predicted), size=1.2)+
  theme_set(theme_cowplot())+
  theme(legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12),
        legend.position = c(0.65,0.9))+
  geom_vline(xintercept=0, col="#757575", lty=2)+
  annotate("text", x= -5, y=-0.22, 
           label= "unflanged", col="#757575")+
  annotate("text", x= 5, y=-0.22,
           label= "flanged", col="#757575")+
  labs(x="Years to/since flanged",
       y="Copulation rate per association hour",
       title = "Longitudinal data") +
  scale_colour_manual(values=c("#A496CF","#3c2375"))+
  guides(col="none")

### from figure it appears as if point 1965 is an outlier (with copulation rate 4)
### a male that was in a short association and copulated once
### check if patterns prevail when excluding this data point

CopAssocCheck <- glmmTMB(CopNr ~ Site + YearSinceFlanged+ Fertility+zFAI+ 
                           + (1|ID/Year) +offset(log(FemAssocH)),
                         data=subset(AssocLongi,CopRateAssoc<4),
                         family="poisson" (link=log))
summary(CopAssocCheck) # patterns prevail

# Copulation frequency - cross-sectional data set ------------------------------

## testing twice copulation number
## Bonferroni adjusted P value: 0.025

## based on male full-day focal follow data --> individual male copulation rates
## only 64 copulations in data set and 8 at Suaq and 56 Tuanan

CopRate0 <- glmmTMB(CopNr ~ 1+ 
                      (1|ID/Year/Month),
                    #offset=log(ObsTime), # all data based on full-day focal follows (i.e. 24h)
                    data=cross1,
                    family=poisson(link="log"))

CopRate <- glmmTMB(CopNr ~ Site + Morph+YearSinceFirstSeenTab +zFAI+ zAssoc+UFMpresent+FLMpresent+
                     (1|ID/Year/Month),
                   #offset=log(ObsTime),
                   data=cross1,
                   family=poisson(link="log"))


### check if interaction improves model fit
CopRateIA <- glmmTMB(CopNr ~ Site*Morph + YearSinceFirstSeenTab +zFAI+zAssoc+ UFMpresent+FLMpresent+
                       (1|ID/Year/Month),
                     #offset=log(ObsTime),
                     data=cross1,
                     family=poisson(link="log"))

anova(CopRate0, CopRate, CopRateIA)
summary(CopRateIA)
confint(CopRateIA)
r.squaredGLMM(CopRateIA)
AIC(CopRate0) - AIC(CopRateIA)
anova(CopRate0, CopRateIA)

# check model assumptions
check_collinearity(CopRateIA) # moderate correlation for interaction term and year since first obs
check_overdispersion(CopRateIA) ## no significant overdispersion
res <-simulateResiduals(CopRateIA, plot = T) ## looks good

## copulation rates reported in MS
cross1$CopRate <-cross1$CopNr/cross1$ObsTime 

CopID <- aggregate(cross1$CopRate, 
                   by=list(cross1$Site,
                           cross1$Morph,
                           cross1$ID),
                   FUN=mean, na.rm=TRUE)
names(CopID) <- c("Site", "Morph", "ID", "CopRate")

# descriptive stats for MS
aggregate(CopID$CopRate,
          by=list(CopID$Morph),
          FUN=mean)

aggregate(CopID$CopRate, 
          by=list(CopID$Morph),
          FUN=sd)

sum(cross1$CopNr) # total number of copulations in data set

### Only take days, when in association with females
## add association days only (when not focal)
## copulation rate per time in association with females

crossALL <- droplevels(subset(dd, transition=="no")) ## exclude longitudinal data
AssocF <- droplevels(subset(crossALL, FemAssocH>0)) ## only days when in association with females
AssocF1 <- droplevels(subset(AssocF, !is.na(FAI))) ## exclude data when FAI data not available
AssocF1$zFAI <- ave(AssocF1$FAI, AssocF1$Site, FUN=scale) ## scale FAI within site

CopAssoc0 <- glmmTMB(CopNr ~ 1+
                       + (1|ID/Year/Month),
                     offset=log(FemAssocH),
                     data=AssocF1,
                     family=poisson(link="log"))

CopAssoc <- glmmTMB(CopNr ~ Site +zFAI+ Morph+ Fertility+YearSinceFirstSeenTab +
                      + (1|ID/Year/Month),
                    offset=log(FemAssocH),
                    data=AssocF1,
                    family=poisson(link="log"))

CopAssocIA <- glmmTMB(CopNr ~ Site*Morph +YearSinceFirstSeenTab +zFAI+ Fertility+
                        + (1|ID/Year/Month),
                      offset=log(FemAssocH),
                      data=AssocF1,
                      family=poisson(link="log"))

anova(CopAssoc0, CopAssoc, CopAssocIA) # with Bonferroni adjusted P value of 0.025 interaction does not sign. improve model fit
summary(CopAssocIA)
confint(CopAssocIA)
r.squaredGLMM(CopAssocIA)
anova(CopAssoc0,CopAssocIA)
AIC(CopAssocIA) - AIC(CopAssoc0)

# check model assumptions
plot(resid(CopAssocIA))
check_collinearity(CopAssocIA) # low correlation
check_overdispersion(CopAssocIA) # no evidence for overdispersion
res <-simulateResiduals(CopAssocIA, plot = T) ## dispersion test sign., otherwise okay

## check average copulation nr by site and morph (as interaction is significant)
## copulation rates reported in MS
AssocF1$CopRate <-AssocF1$CopNr/AssocF1$FemAssocH 
CopID <- aggregate(AssocF1$CopRate, 
                   by=list(AssocF1$Site,
                           AssocF1$Morph,
                           AssocF1$ID),
                   FUN=mean, na.rm=TRUE)
names(CopID) <- c("Site", "Morph", "ID", "CopRate")

# copulation rates reported in MS
aggregate(CopID$CopRate,
          by=list(CopID$Morph),
          FUN=mean)

aggregate(CopID$CopRate, 
          by=list(CopID$Morph),
          FUN=sd)

# Total observed copulations per Follow Type
aggregate(crossALL$CopNr,
          by=list(crossALL$FollowType),
          FUN=sum)

# Figure 4B

#summarize raw data by individual
sumCrCopAssoc <- AssocF1 %>% 
  group_by(Site, Morph, ID) %>%
  dplyr::summarize(mean_CopRateAssoc = mean(CopRate),
                   N = n())

# model predictions
CopAssocCross <- ggpredict(CopAssocIA, terms =c("Morph", "Site"))

CrossCopAssoc <- ggplot()+
  geom_point(data=sumCrCopAssoc,
             mapping=aes(Morph, mean_CopRateAssoc, size=N, col=Site, pch=Site),
             position= position_jitterdodge(dodge.width = 0.9),
             alpha=0.5)+
  geom_segment(data=CopAssocCross,
               aes(x = 0.6, xend = 0.98,
                   y = predicted[1], yend = predicted[1]),
               color = "#0a0972", size = 1.4) +
  geom_segment(data=CopAssocCross,
               aes(x = 0.79, xend = 0.79,
                   y = conf.low[1], yend = conf.high[1]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=CopAssocCross,aes(x = 1.04, xend = 1.42, 
                                      y = predicted[2], yend = predicted[2]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=CopAssocCross,
               aes(x = 1.23, xend = 1.23,
                   y = conf.low[2], yend = conf.high[2]),
               color = "#cf8232", size = 0.9) +
  geom_segment(data=CopAssocCross,aes(x = 1.62, xend = 2.00, 
                                      y = predicted[3], yend = predicted[3]), 
               color = "#0a0972", size = 1.4) +
  geom_segment(data=CopAssocCross,
               aes(x = 1.81, xend = 1.81,
                   y = conf.low[3], yend = conf.high[3]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=CopAssocCross,aes(x = 2.04, xend = 2.42, 
                                      y = predicted[4], yend = predicted[4]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=CopAssocCross,
               aes(x = 2.23, xend = 2.23,
                   y = conf.low[4], yend = conf.high[4]),
               color = "#cf8232", size = 0.9)  +
  scale_color_manual(values=c("#0053d9", "#ffc832"))+
  scale_x_discrete(labels=c("unflanged", "flanged"))+
  theme_set(theme_cowplot())+
  theme(legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12),
        legend.position = c(0.65,0.9))+
  labs(x="Male morph",
       y="Copulation rate per association hour",
       title="Cross-sectional data") +
  guides(size="none")

Fig5 <- plot_grid(LongiCopA, CrossCopAssoc,
                  labels = c('A.','B.'),
                  rel_widths = c(1,1),
                  align="l",
                  ncol = 2)

ggsave("Fig5_CopNr_Jan23_flanged.tiff",
       plot = Fig5,
       device = "tiff",
       scale=1.1,
       width = 160,
       height = 100,
       units = "mm",
       dpi = 300)

# Proportion of copulations that were forced ------------------------------

#including both males that have transitioned and non-trans males
CopF <- aggregate(cbind(dd$CopNr, dd$ForcedCop,
                        dd$ObsTime,
                        dd$FemAssocH),
                  by=list(dd$Site,
                          dd$ID,
                          dd$Morph,
                          dd$transition),
                  FUN=sum, na.rm=TRUE)
names(CopF) <- c("Site", "ID", "Morph", "transition",
                 "TotCop", "TotForced", "TotObs", 
                 "FemH")

CopF$notForced <- CopF$TotCop-CopF$TotForced # calculate numbers of copulations that were not forced
CopF$zFemH <- ave(CopF$FemH, by="Site", FUN=scale) ## z transform data within site (suaq with lower overall numbers, and otherwise collinearity issues site and assoc time)
CopObs <- subset(CopF, TotCop>0) ## only include males that were observed to copulate

# null model
propF0 <- glmmTMB(cbind(TotForced, notForced) ~ 1+
                    (1|ID),
                  data=CopObs,
                  family=binomial(link="logit"))

# full model
propF <- glmmTMB(cbind(TotForced, notForced) ~ Site+Morph+zFemH+transition+
                   (1|ID),
                 data=CopObs,
                 family=binomial(link="logit"))


# interaction model
propFIA <- glmmTMB(cbind(TotForced, notForced) ~ Site*Morph+zFemH+transition+
                     (1|ID),
                   data=CopObs,
                   family=binomial(link="logit"))

# alternative way of specifying model with proportion and weights
propFIAalt <- glmmTMB(TotForced/TotCop ~ Site*Morph+zFemH+transition+
                        (1|ID),
                      data=CopObs,
                      weights = TotCop,
                      family=binomial(link="logit"))

anova(propF0, propF, propFIA) # interaction site*morph improves model fit
anova(propF0, propFIA)
summary(propFIA)
confint(propFIA)
r.squaredGLMM(propFIA)
AIC(propFIA) - AIC(propF0)


# check model assumptions
check_collinearity(propFIA)
check_overdispersion(propFIA) # overdispersion should not be an issue, according to test
plot(resid(propFIA))
res <-simulateResiduals(propFIA, plot = T) # looks ok

# model predictions
ForcedPred <- ggpredict(propFIA, terms=c("Morph", "Site"))

# Figure 4
CrossForced <- ggplot(CopObs, 
                      aes(Morph, TotForced/TotCop))+
  geom_point(aes(size=TotCop, col=Site, pch=Site),
             position= position_jitterdodge(dodge.width = 0.9),
             alpha=0.7)+
  geom_segment(data=ForcedPred,
               aes(x = 0.6, xend = 0.98,
                   y = predicted[1], yend = predicted[1]),
               color = "#0a0972", size = 1.4) +
  geom_segment(data=ForcedPred,
               aes(x = 0.79, xend = 0.79,
                   y = conf.low[1], yend = conf.high[1]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=ForcedPred,aes(x = 1.04, xend = 1.42, 
                                   y = predicted[2], yend = predicted[2]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=ForcedPred,
               aes(x = 1.23, xend = 1.23,
                   y = conf.low[2], yend = conf.high[2]),
               color = "#cf8232", size = 0.9) +
  geom_segment(data=ForcedPred,aes(x = 1.62, xend = 2.00, 
                                   y = predicted[3], yend = predicted[3]), 
               color = "#0a0972", size = 1.4) +
  geom_segment(data=ForcedPred,
               aes(x = 1.81, xend = 1.81,
                   y = conf.low[3], yend = conf.high[3]),
               color = "#0a0972", size = 0.9) +
  geom_segment(data=ForcedPred,aes(x = 2.04, xend = 2.42, 
                                   y = predicted[4], yend = predicted[4]), 
               color = "#cf8232", size = 1.4) +
  geom_segment(data=ForcedPred,
               aes(x = 2.23, xend = 2.23,
                   y = conf.low[4], yend = conf.high[4]),
               color = "#cf8232", size = 0.9)  +
  scale_color_manual(values=c("#0053d9", "#ffc832"))+
  scale_x_discrete(labels=c("unflanged", "flanged"))+
  guides(size="none")+
  theme_set(theme_cowplot())+
  theme(legend.box.background = element_rect(colour="black"),
        legend.box.margin = margin(2,4,2,4),
        title=element_text(size=12),
        legend.text=element_text(size=10),
        legend.position = "top")+
  labs(x="Male morph",
       y="Proportion of copulations that were forced")

ggsave("Fig5_Forced_Dec22.tiff",
       plot = CrossForced,
       device = "tiff",
       scale = 1.3,
       width = 60,
       height = 80,
       units = "mm",
       dpi = 300)

### percentages for MS
sum(CopObs$TotCop) # total copulations
plyr::count(CopObs$ID) # Nr of males
plyr::count(CopObs$ID[CopObs$Site=="Suaq"]) # Nr males Suaq
plyr::count(CopObs$ID[CopObs$Site=="Tuanan"]) # Nr males Tuanan

# Mean proportion of forced copulations 
CopObs$forcedProp <-CopObs$TotForced/CopObs$TotCop

library(Hmisc)
binconf(x=mean(CopObs$TotForced), n=mean(CopObs$TotCop), alpha=0.05)
binconf(x=mean(CopObs$TotForced[CopObs$Site=="Suaq"]), 
        n=mean(CopObs$TotCop[CopObs$Site=="Suaq"]), alpha=0.05)
binconf(x=mean(CopObs$TotForced[CopObs$Site=="Tuanan"]), 
        n=mean(CopObs$TotCop[CopObs$Site=="Tuanan"]), alpha=0.05)

binconf(x=mean(CopObs$TotForced[CopObs$Morph=="ufm"]), 
        n=mean(CopObs$TotCop[CopObs$Morph=="ufm"]), alpha=0.05)
binconf(x=mean(CopObs$TotForced[CopObs$Morph=="flm"]), 
        n=mean(CopObs$TotCop[CopObs$Morph=="flm"]), alpha=0.05)

table(CopObs$Site, CopObs$Morph)

# Reporting numbers for MS - males that force all copulations
table(CopObs$Morph) # count of morphs

# subset of males that force all copulations
allforced <- droplevels(subset(CopObs, forcedProp==1))
table(allforced$Morph)
table(allforced$Morph, allforced$TotCop)

# subset of males that have been observed to copulate 3 times or more
more3 <- droplevels(subset(CopObs, TotCop >=3))
table(more3$Morph)

binconf(x=mean(more3$TotForced[more3$Morph=="ufm"]), 
        n=mean(more3$TotCop[more3$Morph=="ufm"]), alpha=0.05)
binconf(x=mean(more3$TotForced[more3$Morph=="ufm" & more3$Site=="Tuanan"]), 
        n=mean(more3$TotCop[more3$Morph=="ufm" & more3$Site=="Tuanan"]), alpha=0.05)
binconf(x=mean(more3$TotForced[more3$Morph=="ufm" & more3$Site=="Suaq"]), 
        n=mean(more3$TotCop[more3$Morph=="ufm"& more3$Site=="Suaq"]), alpha=0.05)

table(more3$TotForced[more3$Morph=="flm"])
binconf(x=mean(more3$TotForced[more3$Morph=="flm"]), 
        n=mean(more3$TotCop[more3$Morph=="flm"]), alpha=0.05)
binconf(x=mean(more3$TotForced[more3$Morph=="flm" & more3$Site=="Tuanan"]), 
        n=mean(more3$TotCop[more3$Morph=="flm" & more3$Site=="Tuanan"]), alpha=0.05)
binconf(x=mean(more3$TotForced[more3$Morph=="flm" & more3$Site=="Suaq"]), 
        n=mean(more3$TotCop[more3$Morph=="flm"& more3$Site=="Suaq"]), alpha=0.05)

# total focal observation hours
aggregate(CopF$TotObs, 
          by=list(CopF$Site, CopF$Morph), FUN=sum)

# Mean focal observation hours per ID/morph
aggregate(CopF$TotObs, 
          by=list(CopF$Site, CopF$Morph), FUN=mean)
aggregate(CopF$TotObs, 
          by=list(CopF$Site, CopF$Morph), FUN=sd)
table(CopF$Site, CopF$Morph)

sum(CopF$TotObs)

# ALL transition plots ---------------------------------------------------

AllFigHor <- plot_grid(FemNrLongi, longiProxFem, LongiCopA,
                       FemNrCross, FemProxCross, CrossCopAssoc,
                         labels = c('A.','B.','C.',
                                    'D.', 'E.','F.'),
                         align="l",
                         ncol = 3, nrow=2)

ggsave("Fig2_Mar23_Hor.tiff",
       plot = AllFigHor,
       device = "tiff",
       scale = 0.9,
       width = 300,
       height = 210,
       units = "mm",
       dpi = 300)

